## Reading the application data at the beginning since these are the only people we need to keep

rej_app <- read_excel("data/PIR 22-0331 - Batch 1/2-Individual-Mail-Application-Rejected.xlsx") %>% 
  filter(COUNTY_NAME != "TRAVIS") |> 
  select(VUID)
#requests
req_app <- read_excel("data/PIR 22-0331 - Batch 1/1-Individual-Mail-Application-Requested.xlsx") |> 
  select(VUID)

## individual counties

travis_rej <- read_excel("data/individual_counties_abbms/travisCANNOT_SHARE/064.127323 (CONFIDENTIAL) 2022 P22 App Rejections (FINAL).xlsx") %>% 
  mutate(COUNTY_NAME = "TRAVIS",
         VUID = as.numeric(VUID)) %>% 
  select(VUID)

el_paso_rej <- read_excel("data/individual_counties_abbms/el_pasoCANNOT_SHARE/EPC004725.XLSX") %>% 
  mutate(COUNTY_NAME = "EL PASO",
         VUID = as.numeric(VUID)) %>% 
  select(VUID)

webb_rej <- read_excel("data/individual_counties_abbms/Webb - clean.xlsx") %>% 
  mutate(COUNTY_NAME = "WEBB",
         Voters_StateVoterID = as.numeric(Voters_StateVoterID)) %>% 
  select(VUID = Voters_StateVoterID)

acc <- read_excel("data/PIR 22-0331 - Batch 2/4-Individual-Mail-Ballot-Accepted.xlsx") |> 
  select(VUID)
#rejection
rej <- read_excel("data/PIR 22-0331 - Batch 2/3-Individual-Mail-Ballot-Rejected.xlsx") %>% 
  select(VUID)

pop <- bind_rows(rej_app, travis_rej, el_paso_rej, webb_rej, req_app, acc, rej)
#############################
#############################
#############################
#############################

f <- "C:/Users/Researcher 2/Desktop/VM2--TX--2022-05-07/VM2--TX--2022-05-07-DEMOGRAPHIC.tab"

k <- fread(f, sep = "\t",
           select = c("LALVOTERID", "Voters_Active", ## these are the variables we want
                      "Voters_StateVoterID",
                      "Residence_Addresses_Latitude",
                      "Residence_Addresses_Longitude",
                      "Voters_Gender", "Voters_Age", "Parties_Description",
                      "Voters_FIPS", "Voters_LastName",
                      "CommercialData_EstimatedHHIncome",
                      "CommercialData_Education"))

k <- filter(k, !is.na(Residence_Addresses_Latitude),
             !is.na(Residence_Addresses_Longitude),
            Voters_StateVoterID %in% pop$VUID)

cleanup("k")

blocks <- tigris::blocks(state = "TX", year = 2020, class = "sp")

pings  <- SpatialPoints(k[,c('Residence_Addresses_Longitude','Residence_Addresses_Latitude')],
                        proj4string = blocks@proj4string)

k$GEOID <- over(pings, blocks)$GEOID
  
saveRDS(k, "temp/tx_blocks.rds")

cens <- get_census_data(key = "INSERT YOUR KEY", states = "TX", year = 2020, census.geo = "block")
saveRDS(cens, "temp/tx_block_wru_2020.rds")

cens <- readRDS("temp/tx_block_wru_2020.rds")

k <- filter(k, !is.na(GEOID))

k <- rename(k, surname = Voters_LastName)
k$state <- "TX"

k$county <- substring(k$GEOID, 3, 5)
k$tract <- substring(k$GEOID, 6, 11)
k$block <- substring(k$GEOID, 12, 15)


k <- predict_race(k, census.data = cens,
                    year = "2020", census.geo = "block")


print(mean(is.na(k$pred.asi)))

rm(cens)
gc()
############################

hist <- fread("C:/Users/Researcher 2/Desktop/VM2--TX--2022-05-07/VM2--TX--2022-05-07-VOTEHISTORY.tab",
              sep = "\t",
              select = c("LALVOTERID", 
                         "BallotType_Primary_2022_03_01",
                         "BallotType_Primary_2020_03_03",
                         "BallotType_Primary_2018_03_06",
                         "BallotType_Primary_2016_03_01",
                         "BallotType_Primary_2014_03_04",
                         "BallotType_Primary_2012_05_29",
                         "BallotType_Primary_2010_03_02",
                         "BallotType_Primary_2008_03_04",
                         "BallotType_Primary_2006_03_07",
                         "BallotType_Primary_2004_03_09",
                         "BallotType_Primary_2002_03_12",
                         "BallotType_Primary_2000_03_14",
                         "Primary_2022_03_01",
                         "PRI_BLT_2022_03_01",
                         "General_2020_11_03",
                         "General_2002_11_05",
                         "General_2006_11_07",
                         "General_2010_11_02",
                         "General_2012_11_06",
                         "General_2014_11_04",
                         "General_2016_11_08",
                         "General_2018_11_06",
                         "BallotType_General_2002_11_05",
                         "BallotType_General_2006_11_07",
                         "BallotType_General_2010_11_02",
                         "BallotType_General_2012_11_06",
                         "BallotType_General_2014_11_04",
                         "BallotType_General_2016_11_08",
                         "BallotType_General_2018_11_06",
                         "BallotType_General_2020_11_03",
                         "Primary_2010_03_02",
                         "Primary_2012_05_29",
                         "Primary_2014_03_04",
                         "Primary_2016_03_01",
                         "Primary_2018_03_06",
                         "Primary_2020_03_03"))

tot <- inner_join(k, hist)

cleanup("tot")

l <- fread("C:/Users/Researcher 2/Desktop/VM2--TX--2023-03-12/VM2--TX--2023-03-12-VOTEHISTORY.tab",
           select = c("LALVOTERID", "General_2022_11_08",
                      "BallotType_General_2022_11_08"))

tot <- left_join(tot, l) |>
  mutate(dereg_general = is.na("General_2022_11_08"),
         across(ends_with("General_2022_11_08"), ~ ifelse(is.na(.), "", .)))

cleanup("tot")

l <- fread("data/X_1I0I08L6O_submitters_CSV_CUSTOM.csv") |> ## downloaded from L2 web portal
  mutate(BallotType_Primary_2024_03_05 = case_when(C_1A1P08NJ0_Returned_Ballot_Status == "Returned by Mail" ~ "Absentee",
                                                   # Primary_2024_03_05 == 1 & C_1A1P08NJ0_Returned_Ballot_Status == "" ~ "Unknown",
                                                   Primary_2024_03_05 == 1  & (C_1A1P08NJ0_Returned_Ballot_Status != "Returned by Mail") ~ "In Person",
                                                   T ~ ""),
         Primary_2024_03_05 = ifelse(Primary_2024_03_05 == 1, "Y", "")) %>% 
  select(LALVOTERID, BallotType_Primary_2024_03_05, Primary_2024_03_05)

tot <- left_join(tot, l) |>
  mutate(dereg_primary = is.na(Primary_2024_03_05),
         across(ends_with("Primary_2024_03_05"), ~ ifelse(is.na(.), "", .)))

tot <- tot |> 
  mutate(vuid = as.character(Voters_StateVoterID)) |> 
  select(-Voters_StateVoterID)


saveRDS(tot, "temp/combined.rds")
